This R-markdown file presents all analyses for our paper titled “Voice quality and coda /r/ in Glasgow English in the early 20th century”. The analyses are presented in the same order as they are in the paper, and the section numbering also follows our paper (which means that it actually starts at 4.1, which is the first results section in the paper).
We start by importing all relevant libraries and the data. The details of the data processing (e.g. exclusion of outliers) are shown in a separate file titled data_prep.Rmd.
Let’s import the data and relevant libraries first. Note that we have saved our statistical models as RDS files, which allows the user to save time by not having to refit them (some of our generalised additive mixed models may take between 10 minutes to an hour to fit). The user has the option of re-fitting all models by changing the value of the refit_please variable to TRUE below.
library(lme4)
library(mgcv)
library(itsadug)
library(rms)
library(tidyverse)
library(stringr)
library(effects)
library(here)
library(afex)
refit_please <- F # please don't refit all the models - just load them from RDS files
# vowel data
vowels <- read_csv("data/vowels.csv")
Parsed with column specification:
cols(
Speaker = [31mcol_character()[39m,
gender = [31mcol_character()[39m,
Target.stress = [31mcol_character()[39m,
Target.CELEX.phonemes = [31mcol_character()[39m,
Target.orthography = [31mcol_character()[39m,
Match.segments = [31mcol_character()[39m,
Target.segments = [31mcol_character()[39m,
Target.segments.start = [32mcol_double()[39m,
Target.segments.end = [32mcol_double()[39m,
time_0.5 = [32mcol_double()[39m,
vowel = [31mcol_character()[39m,
preceding = [31mcol_character()[39m,
following = [31mcol_character()[39m,
decade = [31mcol_character()[39m,
decade.fact = [31mcol_character()[39m,
duration = [32mcol_double()[39m,
f1 = [32mcol_double()[39m,
f2 = [32mcol_double()[39m,
f3 = [32mcol_double()[39m
)
# formatting for mixed effects models
vowels$Speaker <- as.factor(vowels$Speaker)
vowels$Target.orthography <- as.factor(vowels$Target.orthography)
vowels$vowel <- as.factor(vowels$vowel)
vowels$decade <- ifelse(vowels$decade=="00", 2000, as.numeric(vowels$decade) + 1900)
vowels$decade.fact <- factor(vowels$decade.fact, levels=c("70","80","90","00"))
# /r/ data
R <- read_csv("data/R.csv") # F/M confuse read_csv...
Parsed with column specification:
cols(
.default = col_double(),
speaker = [31mcol_character()[39m,
traj = [31mcol_character()[39m,
gender = [33mcol_logical()[39m,
word = [31mcol_character()[39m,
preceding = [31mcol_character()[39m,
stress = [31mcol_character()[39m,
following = [31mcol_character()[39m,
foll.broad.f2 = [31mcol_character()[39m,
foll.broad.f3 = [31mcol_character()[39m,
position = [31mcol_character()[39m,
trans = [31mcol_character()[39m,
exclude = [33mcol_logical()[39m,
comment = [31mcol_character()[39m
)
See spec(...) for full column specifications.
4554 parsing failures.
row col expected actual file
1090 gender 1/0/T/F/TRUE/FALSE M 'data/R.csv'
1091 gender 1/0/T/F/TRUE/FALSE M 'data/R.csv'
1092 gender 1/0/T/F/TRUE/FALSE M 'data/R.csv'
1093 gender 1/0/T/F/TRUE/FALSE M 'data/R.csv'
1094 gender 1/0/T/F/TRUE/FALSE M 'data/R.csv'
.... ...... .................. ...... ............
See problems(...) for more details.
coltyps <- spec(R)
coltyps$cols$gender <- col_character()
# and again!
R <- read_csv("data/R.csv", col_types=coltyps)
132 parsing failures.
row col expected actual file
1233 exclude 1/0/T/F/TRUE/FALSE unex 'data/R.csv'
1234 exclude 1/0/T/F/TRUE/FALSE unex 'data/R.csv'
1235 exclude 1/0/T/F/TRUE/FALSE unex 'data/R.csv'
1236 exclude 1/0/T/F/TRUE/FALSE unex 'data/R.csv'
1237 exclude 1/0/T/F/TRUE/FALSE unex 'data/R.csv'
.... ....... .................. ...... ............
See problems(...) for more details.
# some changes to variables to make them work with GAMMs
R <- R %>%
mutate(stress = as.ordered(stress),
start.event = (measurement.no==0),
foll.broad.f3 = as.factor(foll.broad.f3),
speaker = as.factor(speaker),
word = as.factor(word))
contrasts(R$stress) <- "contr.treatment"
# separate data sets for males and females
R.m <- R %>%
filter(gender=="M")
R.f <- R %>%
filter(gender=="F")
# setting up variables for analysis via GAMMs
R$trans.broad <- recode(R$trans, wa="a")
R$trans.broad.fact <- as.ordered(R$trans.broad)
contrasts(R$trans.broad.fact) <- "contr.treatment"
R$gender.ordered <- as.ordered(R$gender)
contrasts(R$gender.ordered) <- "contr.treatment"
# auditory R data -- create data set with unique realisations
R.aud <- R[!(R$trans %in% c("", "?r")) & R$measurement.no==0,]
# only a few weakened approximants, so fold these into other categories
R.aud <- R %>%
filter(!(trans %in% c("", "?r")),
!is.na(trans),
measurement.no==0) %>%
mutate(trans.broad=recode(trans, wa="a"),
gender=recode(gender, M="male", `F`="female"))
# auditory voice quality data
# getting f3 avgs
avg.f3 <- unique(select(R, speaker, avg.f3))
# reading & formatting data
R.vq <- read_csv("data/auditoryvq.csv") %>%
rename(speaker="speaker code",
advanced.tip.blade="advanced tip/blade",
tongue.body.height="tongue body height",
tongue.body.front.backness="tongue body front-backness") %>%
# join with avg.f3!
inner_join(avg.f3, by="speaker") %>%
mutate(gender=as.factor(ifelse(!is.na(str_match(speaker, "-m0")), "male", "female")),
decade=unlist(substr(speaker, 1, 2)),
decade=as.numeric(recode(decade, `00`="100")) + 1900)
Parsed with column specification:
cols(
`speaker number` = [32mcol_double()[39m,
`speaker code` = [31mcol_character()[39m,
`advanced tip/blade` = [32mcol_double()[39m,
`tongue body height` = [32mcol_double()[39m,
`tongue body front-backness` = [32mcol_double()[39m,
RTR = [32mcol_double()[39m,
nasalization = [32mcol_double()[39m,
whispery = [32mcol_double()[39m,
`sounds?` = [31mcol_character()[39m
)
Column `speaker` joining character vector and factor, coercing into character vector
Analysing males first, not controlling for baseline F3. Fitting three models:
if (refit_please) {
# model without AR
system.time(
R.m.mod.no.AR <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m)
)
# saveRDS(R.m.mod.no.AR, "models/R.m.mod.no.AR")
# extracting empirical value of autocorrelation at lag 1
R.m.rho <- start_value_rho(R.m.mod.no.AR)
# full model with AR
system.time(
R.m.mod.AR <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.rho)
)
# saveRDS(R.m.mod.AR, "models/R.m.mod.AR")
summary(R.m.mod.AR)
# nested model with AR
system.time(
R.m.mod.AR.min.decade <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
#s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
#ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.rho)
)
# saveRDS(R.m.mod.AR.min.decade, "models/R.m.mod.AR.min.decade")
}
R.m.mod.AR <- readRDS("models/R.m.mod.AR")
R.m.mod.AR.min.decade <- readRDS("models/R.m.mod.AR.min.decade")
compareML(R.m.mod.AR, R.m.mod.AR.min.decade) # significant overall effect of decade
R.m.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
s(decade, k = 4, by = stress) + ti(measurement.no, decade,
k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
k = 4)
R.m.mod.AR.min.decade: f3 ~ stress + s(measurement.no) + s(duration) + s(preceding.front,
k = 3) + s(measurement.no, by = stress) + ti(measurement.no,
duration) + ti(measurement.no, preceding.front, k = c(10,
3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
s(measurement.no, word, bs = "fs", m = 1, k = 4)
Chi-square test of ML scores
-----
AIC difference: -0.11, model R.m.mod.AR has lower AIC.
AIC might not be reliable, as an AR1 model is included (rho1 = 0.848517, rho2 = 0.848517).
Further questions (only first two of these discussed in paper):
if (refit_please) {
# overall stress?
system.time(
R.m.mod.AR.min.stress <- bam(f3 ~ #stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
#s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.rho)
)
# saveRDS(R.m.mod.AR.min.stress, "models/R.m.mod.AR.min.stress")
# shape change?
system.time(
R.m.mod.AR.min.decade.shape <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
#ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.rho)
)
# saveRDS(R.m.mod.AR.min.decade.shape, "models/R.m.mod.AR.min.decade.shape")
# overall stress x decade?
system.time(
R.m.mod.AR.min.decade.stress <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.rho)
)
# saveRDS(R.m.mod.AR.min.decade.stress, "models/R.m.mod.AR.min.decade.stress")
# shape stress x decade?
system.time(
R.m.mod.AR.min.decade.stress.shape <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.rho)
)
# saveRDS(R.m.mod.AR.min.decade.stress.shape, "models/R.m.mod.AR.min.decade.stress.shape")
}
R.m.mod.AR.min.stress <- readRDS("models/R.m.mod.AR.min.stress")
R.m.mod.AR.min.decade.shape <- readRDS("models/R.m.mod.AR.min.decade.shape")
R.m.mod.AR.min.decade.stress <- readRDS("models/R.m.mod.AR.min.decade.stress")
R.m.mod.AR.min.decade.stress.shape <- readRDS("models/R.m.mod.AR.min.decade.stress.shape")
# overall stress?
compareML(R.m.mod.AR, R.m.mod.AR.min.stress, suggest.report=T) # non-sig
R.m.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
s(decade, k = 4, by = stress) + ti(measurement.no, decade,
k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
k = 4)
R.m.mod.AR.min.stress: f3 ~ s(measurement.no) + s(decade, k = 4) + s(duration) + s(preceding.front,
k = 3) + ti(measurement.no, decade, k = c(10, 4)) + ti(measurement.no,
duration) + ti(measurement.no, preceding.front, k = c(10,
3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
s(measurement.no, word, bs = "fs", m = 1, k = 4)
Report suggestion: The Chi-Square test on the ML scores indicates that model R.m.mod.AR is [not significantly?] better than model R.m.mod.AR.min.stress (X2(8.00)=6.385, p>.1).
-----
AIC difference: -8.00, model R.m.mod.AR has lower AIC.
AIC might not be reliable, as an AR1 model is included (rho1 = 0.848517, rho2 = 0.848517).
# shape change?
compareML(R.m.mod.AR, R.m.mod.AR.min.decade.shape, suggest.report=T) # non-sig
R.m.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
s(decade, k = 4, by = stress) + ti(measurement.no, decade,
k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
k = 4)
R.m.mod.AR.min.decade.shape: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
s(decade, k = 4, by = stress) + ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k = c(10, 3)) + s(measurement.no,
foll.broad.f3, bs = "fs", m = 1, k = 4) + s(measurement.no,
speaker, bs = "fs", m = 1, k = 4) + s(measurement.no, word,
bs = "fs", m = 1, k = 4)
Report suggestion: The Chi-Square test on the ML scores indicates that model R.m.mod.AR is [not significantly?] better than model R.m.mod.AR.min.decade.shape (X2(6.00)=3.128, p>.1).
-----
AIC difference: 2.06, model R.m.mod.AR.min.decade.shape has lower AIC.
AIC might not be reliable, as an AR1 model is included (rho1 = 0.848517, rho2 = 0.848517). Only small difference in ML...
# overall stress x decade?
compareML(R.m.mod.AR, R.m.mod.AR.min.decade.stress, suggest.report=T) # non-sig
R.m.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
s(decade, k = 4, by = stress) + ti(measurement.no, decade,
k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
k = 4)
R.m.mod.AR.min.decade.stress: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
ti(measurement.no, decade, k = c(10, 4)) + ti(measurement.no,
duration) + ti(measurement.no, preceding.front, k = c(10,
3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
s(measurement.no, word, bs = "fs", m = 1, k = 4)
Report suggestion: The Chi-Square test on the ML scores indicates that model R.m.mod.AR is [not significantly?] better than model R.m.mod.AR.min.decade.stress (X2(5.00)=4.364, p>.1).
-----
AIC difference: -8.76, model R.m.mod.AR has lower AIC.
AIC might not be reliable, as an AR1 model is included (rho1 = 0.848517, rho2 = 0.848517). Only small difference in ML...
# shape stress x decade?
compareML(R.m.mod.AR, R.m.mod.AR.min.decade.stress.shape) # non-sig
R.m.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
s(decade, k = 4, by = stress) + ti(measurement.no, decade,
k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
k = 4)
R.m.mod.AR.min.decade.stress.shape: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
s(decade, k = 4, by = stress) + ti(measurement.no, decade,
k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
preceding.front, k = c(10, 3)) + s(measurement.no, foll.broad.f3,
bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
k = 4)
Chi-square test of ML scores
-----
AIC difference: -5.97, model R.m.mod.AR has lower AIC.
AIC might not be reliable, as an AR1 model is included (rho1 = 0.848517, rho2 = 0.848517). Only small difference in ML...
Creating figure 2 bottom panel showing the model predictions for males.
# extracting model predictions (using plot_smooth from itsadug)
R.m.full.plot <- plot_smooth(R.m.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="full"), rm.ranef=T, n.grid=100)$fv
Predictor decade is not a factor.
Summary:
* stress : factor; set to the value(s): full.
* measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
* decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
* duration : numeric predictor; set to the value(s): 0.122709575.
* preceding.front : numeric predictor; set to the value(s): 1.
* foll.broad.f3 : factor; set to the value(s): labial. (Might be canceled as random effect, check below.)
* speaker : factor; set to the value(s): 00-O-m06. (Might be canceled as random effect, check below.)
* word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
* NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
R.m.schwa.plot <- plot_smooth(R.m.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="schwa"), rm.ranef=T, n.grid=100)$fv
Predictor decade is not a factor.
Summary:
* stress : factor; set to the value(s): schwa.
* measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
* decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
* duration : numeric predictor; set to the value(s): 0.122709575.
* preceding.front : numeric predictor; set to the value(s): 1.
* foll.broad.f3 : factor; set to the value(s): labial. (Might be canceled as random effect, check below.)
* speaker : factor; set to the value(s): 00-O-m06. (Might be canceled as random effect, check below.)
* word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
* NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
R.m.plot <- rbind(R.m.full.plot, R.m.schwa.plot)
# formatting decade predictor
R.m.plot$decade <- factor(R.m.plot$decade, levels=c("1970","1980","1990","2000"))
# creating the plot
ggplot(R.m.plot, aes(x=measurement.no, y=fit, group=decade, col=decade, lty=decade)) +
geom_line(lwd=1) +
facet_grid(.~stress) +
geom_ribbon(aes(ymin=ll, ymax=ul, group=decade), col=NA,col="grey", alpha=0.1) +
scale_color_manual(values=c("orange","darkorange3","deepskyblue1","deepskyblue4"),
name="decade of\nrecording") +
scale_linetype_discrete(name="decade of\nrecording") +
scale_x_continuous(name="measurement point", limits = c(0, 10), breaks=seq(0,10,2)) +
scale_y_continuous(name="F3 (Hz)", limits=c(2000, 3050)) +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank(),
strip.text=element_text(size=12)) +
ggtitle("F3 changes for /r/ in males (not controlling for baseline F3)")
Duplicated aesthetics after name standardisation: colour
#ggsave("graphs/r_male_f3_change_no_control.pdf", width=8, height=4)
Analysing females, not controlling for baseline F3. Fitting three models:
if (refit_please) {
# model without AR
system.time(
R.f.mod.no.AR <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f)
)
# saveRDS(R.f.mod.no.AR, "models/R.f.mod.no.AR")
# extracting empirical value of autocorrelation at lag 1
R.f.rho <- start_value_rho(R.f.mod.no.AR)
# full model with AR
system.time(
R.f.mod.AR <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.rho)
)
# saveRDS(R.f.mod.AR, "models/R.f.mod.AR")
summary(R.f.mod.AR)
# nested model with AR
system.time(
R.f.mod.AR.min.decade <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
#s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
#ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.rho)
)
# saveRDS(R.f.mod.AR.min.decade, "models/R.f.mod.AR.min.decade")
}
R.f.mod.AR <- readRDS("models/R.f.mod.AR")
R.f.mod.AR.min.decade <- readRDS("models/R.f.mod.AR.min.decade")
compareML(R.f.mod.AR, R.f.mod.AR.min.decade) # significant overall effect of decade
R.f.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
s(decade, k = 4, by = stress) + ti(measurement.no, decade,
k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
k = 4)
R.f.mod.AR.min.decade: f3 ~ stress + s(measurement.no) + s(duration) + s(preceding.front,
k = 3) + s(measurement.no, by = stress) + ti(measurement.no,
duration) + ti(measurement.no, preceding.front, k = c(10,
3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
s(measurement.no, word, bs = "fs", m = 1, k = 4)
Chi-square test of ML scores
-----
AIC difference: -7.77, model R.f.mod.AR has lower AIC.
AIC might not be reliable, as an AR1 model is included (rho1 = 0.850619, rho2 = 0.850619).
Further questions (only first two of these discussed in paper):
if (refit_please) {
# overall stress?
system.time(
R.f.mod.AR.min.stress <- bam(f3 ~ #stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
#s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.rho)
)
# saveRDS(R.f.mod.AR.min.stress, "models/R.f.mod.AR.min.stress")
# shape change?
system.time(
R.f.mod.AR.min.decade.shape <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
#ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.rho)
)
# saveRDS(R.f.mod.AR.min.decade.shape, "models/R.f.mod.AR.min.decade.shape")
# overall stress x decade?
system.time(
R.f.mod.AR.min.decade.stress <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.rho)
)
# saveRDS(R.f.mod.AR.min.decade.stress, "models/R.f.mod.AR.min.decade.stress")
# shape stress x decade?
system.time(
R.f.mod.AR.min.decade.stress.shape <- bam(f3 ~ stress +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.rho)
)
# saveRDS(R.f.mod.AR.min.decade.stress.shape, "models/R.f.mod.AR.min.decade.stress.shape")
}
R.f.mod.AR.min.stress <- readRDS("models/R.f.mod.AR.min.stress")
R.f.mod.AR.min.decade.shape <- readRDS("models/R.f.mod.AR.min.decade.shape")
R.f.mod.AR.min.decade.stress <- readRDS("models/R.f.mod.AR.min.decade.stress")
R.f.mod.AR.min.decade.stress.shape <- readRDS("models/R.f.mod.AR.min.decade.stress.shape")
# overall stress?
compareML(R.f.mod.AR, R.f.mod.AR.min.stress, suggest.report=T) # sig
R.f.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
s(decade, k = 4, by = stress) + ti(measurement.no, decade,
k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
k = 4)
R.f.mod.AR.min.stress: f3 ~ s(measurement.no) + s(decade, k = 4) + s(duration) + s(preceding.front,
k = 3) + ti(measurement.no, decade, k = c(10, 4)) + ti(measurement.no,
duration) + ti(measurement.no, preceding.front, k = c(10,
3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
s(measurement.no, word, bs = "fs", m = 1, k = 4)
Report suggestion: The Chi-Square test on the ML scores indicates that model R.f.mod.AR is [marginally / significantly?] better than model R.f.mod.AR.min.stress (X2(8.00)=9.201, p0.018).
-----
AIC difference: -2.30, model R.f.mod.AR has lower AIC.
AIC might not be reliable, as an AR1 model is included (rho1 = 0.850619, rho2 = 0.850619).
# shape change?
compareML(R.f.mod.AR, R.f.mod.AR.min.decade.shape, suggest.report=T) # sig
R.f.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
s(decade, k = 4, by = stress) + ti(measurement.no, decade,
k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
k = 4)
R.f.mod.AR.min.decade.shape: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
s(decade, k = 4, by = stress) + ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k = c(10, 3)) + s(measurement.no,
foll.broad.f3, bs = "fs", m = 1, k = 4) + s(measurement.no,
speaker, bs = "fs", m = 1, k = 4) + s(measurement.no, word,
bs = "fs", m = 1, k = 4)
Report suggestion: The Chi-Square test on the ML scores indicates that model R.f.mod.AR is [marginally / significantly?] better than model R.f.mod.AR.min.decade.shape (X2(6.00)=8.210, p0.012).
-----
AIC difference: -9.33, model R.f.mod.AR has lower AIC.
AIC might not be reliable, as an AR1 model is included (rho1 = 0.850619, rho2 = 0.850619).
# overall stress x decade?
compareML(R.f.mod.AR, R.f.mod.AR.min.decade.stress, suggest.report=T) # sig
R.f.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
s(decade, k = 4, by = stress) + ti(measurement.no, decade,
k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
k = 4)
R.f.mod.AR.min.decade.stress: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
ti(measurement.no, decade, k = c(10, 4)) + ti(measurement.no,
duration) + ti(measurement.no, preceding.front, k = c(10,
3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
s(measurement.no, word, bs = "fs", m = 1, k = 4)
Report suggestion: The Chi-Square test on the ML scores indicates that model R.f.mod.AR is [marginally / significantly?] better than model R.f.mod.AR.min.decade.stress (X2(5.00)=6.830, p0.018).
-----
AIC difference: -4.35, model R.f.mod.AR has lower AIC.
AIC might not be reliable, as an AR1 model is included (rho1 = 0.850619, rho2 = 0.850619).
# shape stress x decade?
compareML(R.f.mod.AR, R.f.mod.AR.min.decade.stress.shape) # sig
R.f.mod.AR: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
s(decade, k = 4, by = stress) + ti(measurement.no, decade,
k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
k = 4)
R.f.mod.AR.min.decade.stress.shape: f3 ~ stress + s(measurement.no) + s(decade, k = 4) + s(duration) +
s(preceding.front, k = 3) + s(measurement.no, by = stress) +
s(decade, k = 4, by = stress) + ti(measurement.no, decade,
k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
preceding.front, k = c(10, 3)) + s(measurement.no, foll.broad.f3,
bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
k = 4)
Chi-square test of ML scores
-----
AIC difference: -5.03, model R.f.mod.AR has lower AIC.
AIC might not be reliable, as an AR1 model is included (rho1 = 0.850619, rho2 = 0.850619). Only small difference in ML...
Creating figure 2 top panel showing the model predictions for females.
# extracting model predictions (using plot_smooth from itsadug)
R.f.full.plot <- plot_smooth(R.f.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="full"), rm.ranef=T, n.grid=100)$fv
Predictor decade is not a factor.
Summary:
* stress : factor; set to the value(s): full.
* measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
* decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
* duration : numeric predictor; set to the value(s): 0.103923377.
* preceding.front : numeric predictor; set to the value(s): 1.
* foll.broad.f3 : factor; set to the value(s): coronal. (Might be canceled as random effect, check below.)
* speaker : factor; set to the value(s): 00-O-f01. (Might be canceled as random effect, check below.)
* word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
* NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
R.f.schwa.plot <- plot_smooth(R.f.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="schwa"), rm.ranef=T, n.grid=100)$fv
Predictor decade is not a factor.
Summary:
* stress : factor; set to the value(s): schwa.
* measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
* decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
* duration : numeric predictor; set to the value(s): 0.103923377.
* preceding.front : numeric predictor; set to the value(s): 1.
* foll.broad.f3 : factor; set to the value(s): coronal. (Might be canceled as random effect, check below.)
* speaker : factor; set to the value(s): 00-O-f01. (Might be canceled as random effect, check below.)
* word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
* NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
R.f.plot <- rbind(R.f.full.plot, R.f.schwa.plot)
# formatting decade predictor
R.f.plot$decade <- factor(R.f.plot$decade, levels=c("1970","1980","1990","2000"))
# creating plot
ggplot(R.f.plot, aes(x=measurement.no, y=fit, group=decade, col=decade, lty=decade)) +
geom_line(lwd=1) +
facet_grid(.~stress) +
geom_ribbon(aes(ymin=ll, ymax=ul, group=decade), col=NA,col="grey", alpha=0.1) +
scale_color_manual(name="decade of\nrecording", values=c("orange","darkorange3","deepskyblue1","deepskyblue4")) +
scale_linetype_discrete(name="decade of\nrecording") +
scale_x_continuous(name="measurement point", limits = c(0, 10), breaks=seq(0,10,2)) +
scale_y_continuous(name="F3 (Hz)", limits=c(2000, 3050)) +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank(),
strip.text=element_text(size=12)) +
ggtitle("F3 changes for /r/ in females (not controlling for baseline F3)")
Duplicated aesthetics after name standardisation: colour
#ggsave("graphs/r_female_f3_change_no_control.pdf", width=8, height=4)
First, we fit our model of dynamic F3 measurements as a function of /r/-realisation, whose predictions are plotted in figure 3.
if (refit_please) {
# model without AR
system.time(
R.shapes.mod.no.AR <- bam(f3 ~ stress + trans.broad.fact + gender +
# level 1 smooth terms
s(measurement.no) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(measurement.no, by=trans.broad.fact) +
s(measurement.no, by=gender) +
# level 2: 2d smooths
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R)
)
#saveRDS(R.shapes.mod.no.AR, "models/R.shapes.mod.no.AR")
# extracting empirical autocorrelation value at lag 1
R.shapes.rho <- start_value_rho(R.shapes.mod.no.AR)
# setting up start.event for use with AR1
R$start.event <- R$measurement.no == 0
# full model with AR
system.time(
R.shapes.mod.AR <- bam(f3 ~ stress + trans.broad.fact + gender +
# level 1 smooth terms
s(measurement.no) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(measurement.no, by=trans.broad.fact) +
s(measurement.no, by=gender) +
# level 2: 2d smooths
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R,
AR.start=R$start.event, rho=R.shapes.rho,
method="ML")
)
#saveRDS(R.shapes.mod.AR, "models/R.shapes.mod.AR")
summary(R.shapes.mod.AR)
# nested model
system.time(
R.shapes.mod.AR.min.trans <- bam(f3 ~ stress + gender + # trans.broad.fact +
# level 1 smooth terms
s(measurement.no) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
#s(measurement.no, by=trans.broad.fact) +
s(measurement.no, by=gender) +
# level 2: 2d smooths
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R,
AR.start=R$start.event, rho=R.shapes.rho,
method="ML")
)
#saveRDS(R.shapes.mod.AR.min.trans, "models/R.shapes.mod.AR.min.trans")
}
R.shapes.mod.AR <- readRDS("models/R.shapes.mod.AR")
R.shapes.mod.AR.min.trans <- readRDS("models/R.shapes.mod.AR.min.trans")
compareML(R.shapes.mod.AR, R.shapes.mod.AR.min.trans)
R.shapes.mod.AR: f3 ~ stress + trans.broad.fact + gender + s(measurement.no) +
s(duration) + s(preceding.front, k = 3) + s(measurement.no,
by = stress) + s(measurement.no, by = trans.broad.fact) +
s(measurement.no, by = gender) + ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k = c(10, 3)) + s(measurement.no,
foll.broad.f3, bs = "fs", m = 1, k = 4) + s(measurement.no,
speaker, bs = "fs", m = 1, k = 4) + s(measurement.no, word,
bs = "fs", m = 1, k = 4)
R.shapes.mod.AR.min.trans: f3 ~ stress + gender + s(measurement.no) + s(duration) + s(preceding.front,
k = 3) + s(measurement.no, by = stress) + s(measurement.no,
by = gender) + ti(measurement.no, duration) + ti(measurement.no,
preceding.front, k = c(10, 3)) + s(measurement.no, foll.broad.f3,
bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
k = 4)
Chi-square test of ML scores
-----
AIC difference: -192.66, model R.shapes.mod.AR has lower AIC.
AIC might not be reliable, as an AR1 model is included (rho1 = 0.860217, rho2 = 0.860217).
Now creating figure 3.
# extracting model predictions
R.shapes <- plot_smooth(R.shapes.mod.AR, view="measurement.no", plot_all="trans.broad.fact",
cond=list(gender="F"), rm.ranef=T, rug=F)[[1]]
Summary:
* stress : factor; set to the value(s): schwa.
* trans.broad.fact : factor; set to the value(s): a, i, o, r, t, wt.
* gender : factor; set to the value(s): F.
* measurement.no : numeric predictor; with 30 values ranging from 0.000000 to 10.000000.
* duration : numeric predictor; set to the value(s): 0.11452771.
* preceding.front : numeric predictor; set to the value(s): 1.
* foll.broad.f3 : factor; set to the value(s): labial. (Might be canceled as random effect, check below.)
* speaker : factor; set to the value(s): 00-O-f01. (Might be canceled as random effect, check below.)
* word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
* NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
# releveling, meaningful names for realisations
R.shapes$trans.broad.fact <- factor(R.shapes$trans.broad.fact, levels=c("o","i","wt","a","t","r"))
R.shapes <- R.shapes %>%
mutate(realisation=recode(trans.broad.fact,
o="zero",
i="intermediate",
wt="weakened tap",
a="approximant",
t="full tap",
r="trill"),
realisation=factor(realisation,
levels=c("zero",
"intermediate",
"weakened tap",
"approximant",
"full tap",
"trill"))
)
# excluding trills
R.shapes <- filter(R.shapes, trans.broad.fact != "r")
# creating graph
ggplot(R.shapes, aes(x=measurement.no, y=fit, col=realisation, lty=realisation)) +
geom_ribbon(aes(ymin=ll, ymax=ul, group=trans.broad.fact), alpha=0.05, colour=NA) +
geom_line(lwd=1) +
scale_linetype_manual(values = c(3,4,2,
5,1),
name="/r/ realisation") +
scale_colour_manual(values = c("orange","darkorange1",
"darkorange3","deepskyblue1",
"deepskyblue3"),
name="/r/ realisation") +
scale_x_continuous(name="measurement point", limits = c(0, 10), breaks=seq(0,10,2)) +
scale_y_continuous(name="F3 (Hz)", limits=c(2450,2950)) +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank(),
strip.text=element_text(size=12)) +
ggtitle("Acoustic correlates of different /r/ realisations in F3")
#ggsave("graphs/r_acoustic_correlates.pdf", width=6, height=4)
We now create figure 4, that is, the proportions of different realisations as a function of time.
# calculating proportions
R.props.dec <- R.aud %>%
count(decade, gender, trans.broad) %>%
group_by(decade, gender) %>%
mutate(prop = n/sum(n)) %>%
ungroup()
# releveling
R.props.dec$trans.broad <- factor(R.props.dec$trans.broad, levels=c("o","i","wt","a","t","r"))
# creating the graph
ggplot(R.props.dec, aes(x=decade, fill=trans.broad)) +
geom_bar(aes(y=prop), stat="identity", position="stack") +
facet_wrap(~gender) +
scale_fill_manual(values = c("orange","darkorange1",
"darkorange3","deepskyblue1",
"deepskyblue3","deepskyblue4"),
name="/r/ realisation",
labels=c("zero","intermediate",
"weakened tap","approximant",
"tap","trill")) +
scale_y_continuous(labels = scales::percent, name="percentage of realisations") +
scale_x_continuous(name = "decade of recording") +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank(),
strip.text=element_text(size=12)) +
ggtitle("Changes in proportions of auditory /r/ variants")
#ggsave("graphs/r_auditory.pdf", width=8, height=4)
We now run the logistic regression models whose results are reported in section 4.2 of the paper.
# creating a strength of /r/ variable and a presence of /r/ variable (both binary)
R.aud <- mutate(R.aud, r.strength=recode(trans.broad,
t="strong",
a="strong",
r="strong",
wt="weak",
i="weak",
o="weak"),
r.presence=recode(trans.broad,
t="present",
a="present",
r="present",
wt="present",
i="deleted",
o="deleted")
)
R.aud$r.strength <- factor(R.aud$r.strength, levels=c("weak","strong"))
R.aud$r.presence <- factor(R.aud$r.presence, levels=c("deleted","present"))
if (refit_please) {
# note the absence of random slopes over decade by word (not possible to include due to non-convergence)
# full model for strength
r.str.mod <- glmer(r.strength ~ scale(decade) * gender + stress +
(1 | speaker) +
(1 | word),
data=R.aud,
family="binomial")
#saveRDS(r.str.mod, "models/r.str.mod")
# nested model for strength
r.str.mod.min.decade <- glmer(r.strength ~ gender + stress +
(1 | speaker) +
(1 | word),
data=R.aud,
family="binomial")
#saveRDS(r.str.mod.min.decade, "models/r.str.mod.min.decade")
# note the absence of random slopes over decade by word (not possible to include due to non-convergence)
# full model for /r/ presence
r.pres.mod <- glmer(r.presence ~ scale(decade) * gender + stress +
(1 | speaker) +
(1 | word),
data=R.aud,
family="binomial",
control=glmerControl(optimizer="bobyqa",
optCtrl=list(maxfun=2e5)))
#saveRDS(r.pres.mod, "models/r.pres.mod")
# nested model for /r/ presence
r.pres.mod.min.decade <- glmer(r.presence ~ gender + stress +
(1 | speaker) +
(1 | word),
data=R.aud,
family="binomial")
#saveRDS(r.pres.mod.min.decade, "models/r.pres.mod.min.decade")
}
r.str.mod <- readRDS("models/r.str.mod")
r.str.mod.min.decade <- readRDS("models/r.str.mod.min.decade")
r.pres.mod <- readRDS("models/r.pres.mod")
r.pres.mod.min.decade <- readRDS("models/r.pres.mod.min.decade")
summary(r.str.mod)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
Family: binomial ( logit )
Formula: r.strength ~ scale(decade) * gender + stress + (1 | speaker) + (1 | word)
Data: R.aud
AIC BIC logLik deviance df.resid
1031.4 1064.2 -508.7 1017.4 799
Scaled residuals:
Min 1Q Median 3Q Max
-1.6444 -0.7391 -0.5031 0.9313 3.7346
Random effects:
Groups Name Variance Std.Dev.
word (Intercept) 0.4511 0.6717
speaker (Intercept) 0.3346 0.5784
Number of obs: 806, groups: word, 138; speaker, 24
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.456095 0.254805 -1.790 0.0735 .
scale(decade) -0.275813 0.206426 -1.336 0.1815
gendermale 0.028435 0.287716 0.099 0.9213
stressschwa -0.004962 0.185419 -0.027 0.9786
scale(decade):gendermale 0.051526 0.288656 0.179 0.8583
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) scl(d) gndrml strsss
scale(decd) 0.031
gendermale -0.590 -0.029
stressschwa -0.469 0.001 0.023
scl(dcd):gn -0.014 -0.714 0.028 -0.027
summary(r.pres.mod)
Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
Family: binomial ( logit )
Formula: r.presence ~ scale(decade) * gender + stress + (1 | speaker) + (1 | word)
Data: R.aud
Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
AIC BIC logLik deviance df.resid
857.4 890.2 -421.7 843.4 799
Scaled residuals:
Min 1Q Median 3Q Max
-3.7662 -0.6148 0.3946 0.5393 1.7176
Random effects:
Groups Name Variance Std.Dev.
word (Intercept) 0.6589 0.8117
speaker (Intercept) 0.3310 0.5753
Number of obs: 806, groups: word, 138; speaker, 24
Fixed effects:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.52029 0.29174 5.211 1.88e-07 ***
scale(decade) -0.30539 0.21109 -1.447 0.1480
gendermale 0.41775 0.30091 1.388 0.1650
stressschwa -0.47407 0.21591 -2.196 0.0281 *
scale(decade):gendermale 0.03613 0.29750 0.121 0.9033
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) scl(d) gndrml strsss
scale(decd) -0.026
gendermale -0.502 0.025
stressschwa -0.510 0.026 -0.023
scl(dcd):gn 0.027 -0.712 -0.034 -0.045
# model comparisons reported in paper
anova(r.str.mod, r.str.mod.min.decade,test="Chisq")
Data: R.aud
Models:
r.str.mod.min.decade: r.strength ~ gender + stress + (1 | speaker) + (1 | word)
r.str.mod: r.strength ~ scale(decade) * gender + stress + (1 | speaker) +
r.str.mod: (1 | word)
Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
r.str.mod.min.decade 5 1030.3 1053.7 -510.14 1020.3
r.str.mod 7 1031.4 1064.2 -508.70 1017.4 2.8712 2 0.238
anova(r.pres.mod, r.pres.mod.min.decade,test="Chisq")
Data: R.aud
Models:
r.pres.mod.min.decade: r.presence ~ gender + stress + (1 | speaker) + (1 | word)
r.pres.mod: r.presence ~ scale(decade) * gender + stress + (1 | speaker) +
r.pres.mod: (1 | word)
Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
r.pres.mod.min.decade 5 856.85 880.31 -423.43 846.85
r.pres.mod 7 857.38 890.23 -421.69 843.38 3.4693 2 0.1765
First some plots showing the raw data for F1/F2/F3 broken down by vowels (these are especially useful as they show that all vowels show a rise in F39.
ggplot(vowels, aes(x=decade.fact, y=f1)) +
facet_grid(gender ~ vowel) +
geom_boxplot()
ggplot(vowels, aes(x=decade.fact, y=f2)) +
facet_grid(gender ~ vowel) +
geom_boxplot()
ggplot(vowels, aes(x=decade.fact, y=f3)) +
facet_grid(gender ~ vowel) +
geom_boxplot()
We’ll fit linear mixed models to F1/F2/F3, running the models in separate R-markdown chunks.
First F1:
# this is the model that we would like to have - but it
# does not converge due to a singularity involving
# the decade by vowel random slope
# f1.mod.full <- lmer(f1 ~ gender * decade.fact +
# scale(duration) +
# (1 + decade.fact | vowel) +
# (1 | Target.orthography) +
# (1 | Speaker),
# data=vowels, REML=F)
# these are two alternative models; model 1 forces the decade
# effect to be linear
if (refit_please) {
f1.mod.full.1 <- lmer(f1 ~ gender * scale(decade) +
scale(duration) +
(1 + scale(decade) | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f1.mod.full.1, "models/f1.mod.full.1")
# model 2 has no by-vowel random slope for decade (but the decade
# effect is allowed to be non-linear)
f1.mod.full.2 <- lmer(f1 ~ gender * decade.fact +
scale(duration) +
(1 | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f1.mod.full.2, "models/f1.mod.full.2")
# neither model shows a significant effect of decade:
# (and note that model 2 is expected to be anti-conservative with
# respect to decade!)
f1.mod.comp.1 <- lmer(f1 ~ gender + scale(duration) +
(1 + scale(decade) | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f1.mod.comp.1, "models/f1.mod.comp.1")
f1.mod.comp.2 <- lmer(f1 ~ gender + scale(duration) +
(1 | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f1.mod.comp.2, "models/f1.mod.comp.2")
}
f1.mod.full.1 <- readRDS("models/f1.mod.full.1")
f1.mod.full.2 <- readRDS("models/f1.mod.full.2")
f1.mod.comp.1 <- readRDS("models/f1.mod.comp.1")
f1.mod.comp.2 <- readRDS("models/f1.mod.comp.2")
anova(f1.mod.full.1, f1.mod.comp.1) # non-sig
Data: vowels
Models:
f1.mod.comp.1: f1 ~ gender + scale(duration) + (1 + scale(decade) | vowel) +
f1.mod.comp.1: (1 | Target.orthography) + (1 | Speaker)
f1.mod.full.1: f1 ~ gender * scale(decade) + scale(duration) + (1 + scale(decade) |
f1.mod.full.1: vowel) + (1 | Target.orthography) + (1 | Speaker)
Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
f1.mod.comp.1 9 85694 85756 -42838 85676
f1.mod.full.1 11 85695 85772 -42837 85673 2.381 2 0.3041
anova(f1.mod.full.2, f1.mod.comp.2) # non-sig
Data: vowels
Models:
f1.mod.comp.2: f1 ~ gender + scale(duration) + (1 | vowel) + (1 | Target.orthography) +
f1.mod.comp.2: (1 | Speaker)
f1.mod.full.2: f1 ~ gender * decade.fact + scale(duration) + (1 | vowel) + (1 |
f1.mod.full.2: Target.orthography) + (1 | Speaker)
Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
f1.mod.comp.2 7 85710 85759 -42848 85696
f1.mod.full.2 13 85717 85807 -42845 85691 5.4121 6 0.4921
F1 does not show a significant change.
# this is the model that we would like to have - but it
# does not converge due to a singularity involving
# the decade by vowel random slope
# f2.mod.full <- lmer(f2 ~ gender * decade.fact +
# scale(duration) +
# (1 + decade.fact | vowel) +
# (1 | Target.orthography) +
# (1 | Speaker),
# data=vowels, REML=F)
# these are two alternative models; model 1 forces the decade
# effect to be linear
if (refit_please) {
f2.mod.full.1 <- lmer(f2 ~ gender * scale(decade) +
scale(duration) +
(1 + scale(decade) | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f2.mod.full.1, "models/f2.mod.full.1")
# model 2 has no by-vowel random slope for decade (but the decade
# effect is allowed to be non-linear)
f2.mod.full.2 <- lmer(f2 ~ gender * decade.fact +
scale(duration) +
(1 | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f2.mod.full.2, "models/f2.mod.full.2")
# neither model shows a significant effect of decade:
# (and note that model 2 is expected to be anti-conservative with
# respect to decade!)
f2.mod.comp.1 <- lmer(f2 ~ gender + scale(duration) +
(1 + scale(decade) | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f2.mod.comp.1, "models/f2.mod.comp.1")
f2.mod.comp.2 <- lmer(f2 ~ gender + scale(duration) +
(1 | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f2.mod.comp.2, "models/f2.mod.comp.2")
}
f2.mod.full.1 <- readRDS("models/f2.mod.full.1")
f2.mod.comp.1 <- readRDS("models/f2.mod.comp.1")
f2.mod.full.2 <- readRDS("models/f2.mod.full.2")
f2.mod.comp.2 <- readRDS("models/f2.mod.comp.2")
anova(f2.mod.full.1, f2.mod.comp.1) # non-sig
Data: vowels
Models:
f2.mod.comp.1: f2 ~ gender + scale(duration) + (1 + scale(decade) | vowel) +
f2.mod.comp.1: (1 | Target.orthography) + (1 | Speaker)
f2.mod.full.1: f2 ~ gender * scale(decade) + scale(duration) + (1 + scale(decade) |
f2.mod.full.1: vowel) + (1 | Target.orthography) + (1 | Speaker)
Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
f2.mod.comp.1 9 101665 101727 -50823 101647
f2.mod.full.1 11 101664 101740 -50821 101642 5.0774 2 0.07897 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
anova(f2.mod.full.2, f2.mod.comp.2) # non-sig
Data: vowels
Models:
f2.mod.comp.2: f2 ~ gender + scale(duration) + (1 | vowel) + (1 | Target.orthography) +
f2.mod.comp.2: (1 | Speaker)
f2.mod.full.2: f2 ~ gender * decade.fact + scale(duration) + (1 | vowel) + (1 |
f2.mod.full.2: Target.orthography) + (1 | Speaker)
Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
f2.mod.comp.2 7 102735 102783 -51360 102721
f2.mod.full.2 13 102742 102833 -51358 102716 4.4788 6 0.6122
F2 does not show a significant change.
Now for F3:
if (refit_please) {
f3.mod.full <- lmer(f3 ~ gender * decade.fact +
scale(duration) +
(1 + decade.fact | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f3.mod.full, "models/f3.mod.full")
#summary(f3.mod.full)
f3.mod.comp <- lmer(f3 ~ gender + scale(duration) +
(1 + decade.fact | vowel) +
(1 | Target.orthography) +
(1 | Speaker),
data=vowels, REML=F)
# saveRDS(f3.mod.comp, "models/f3.mod.comp")
}
f3.mod.full <- readRDS("models/f3.mod.full")
f3.mod.comp <- readRDS("models/f3.mod.comp")
anova(f3.mod.full, f3.mod.comp)
Data: vowels
Models:
f3.mod.comp: f3 ~ gender + scale(duration) + (1 + decade.fact | vowel) + (1 |
f3.mod.comp: Target.orthography) + (1 | Speaker)
f3.mod.full: f3 ~ gender * decade.fact + scale(duration) + (1 + decade.fact |
f3.mod.full: vowel) + (1 | Target.orthography) + (1 | Speaker)
Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
f3.mod.comp 16 102962 103073 -51465 102930
f3.mod.full 22 102952 103105 -51454 102908 22.166 6 0.00113 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
F3 clearly rises significantly over time. Prediction plot for the paper, i.e. figure 5:
# extracting predictions from the model
new.dat <- expand.grid(gender=unique(vowels$gender),
decade.fact=levels(vowels$decade.fact),
duration=median(vowels$duration),
vowel=unique(vowels$vowel)[1])
new.dat$f3 <- 0
new.dat$f3 <- predict(f3.mod.full, new.dat, re.form=NA)
mm <- model.matrix(terms(f3.mod.full),new.dat) # for confidence intervals
pvar1 <- diag(mm %*% tcrossprod(vcov(f3.mod.full),mm))
new.dat <- data.frame(
new.dat,
lower = new.dat$f3-2*sqrt(pvar1),
upper = new.dat$f3+2*sqrt(pvar1)
)
# creating the graph
mean_fun <- function(x){
return(data.frame(y = mean(x), label = paste0(" ", round(mean(x)), " Hz")))
}
ggplot(new.dat, aes(x=decade.fact, y=f3, col=gender)) +
geom_violin(data=vowels, aes(fill=gender, col=NA), alpha=0.2, show.legend=F) +
geom_point(size=2, position=position_dodge(width=0.9)) +
geom_errorbar(aes(ymin=lower, ymax=upper), width=0.25, position=position_dodge(width=0.9)) +
stat_summary(fun.data = mean_fun, geom = "text", size=4, position=position_dodge(0.9), hjust=0, show.legend=F) +
scale_color_manual(name="gender",breaks=c("F","M"), labels=c("female", "male"),
values=c("deepskyblue4","orange")) +
scale_fill_manual(name="gender",breaks=c("F","M"), labels=c("female", "male"),
values=c("deepskyblue4","orange"), guide=F) +
scale_x_discrete(name="decade of recording", labels=c(1970,1980,1990,2000), expand=expand_scale(add=c(0.05,1))) +
scale_y_continuous(name="F3") +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank()) +
ggtitle("F3 changes for vowels (with model predictions)")
#ggsave("graphs/vowels_f3_change.pdf", width=8, height=4)
We first fit a model that shows how F3 correlates with different auditory measures
vq.f3.model <- lm(avg.f3 ~ advanced.tip.blade +
tongue.body.height +
tongue.body.front.backness +
gender, data=R.vq)
summary(vq.f3.model)
Call:
lm(formula = avg.f3 ~ advanced.tip.blade + tongue.body.height +
tongue.body.front.backness + gender, data = R.vq)
Residuals:
Min 1Q Median 3Q Max
-232.20 -110.79 12.56 90.08 253.19
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2764.70 125.14 22.093 1.72e-14 ***
advanced.tip.blade -77.74 62.35 -1.247 0.228461
tongue.body.height 66.50 26.90 2.472 0.023643 *
tongue.body.front.backness -92.19 52.15 -1.768 0.094008 .
gendermale -346.30 77.08 -4.493 0.000281 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 165.6 on 18 degrees of freedom
(1 observation deleted due to missingness)
Multiple R-squared: 0.6679, Adjusted R-squared: 0.5942
F-statistic: 9.052 on 4 and 18 DF, p-value: 0.0003441
We then fit three separate models (with decade centred and gender sum coded) to check for decade / gender effects in our three auditory measures.
# centring decade
R.vq$decade.centred <- scale(R.vq$decade, scale=F)
# sum coding gender
R.vq$gender.sum <- R.vq$gender
contrasts(R.vq$gender.sum) <- contr.sum(2)
# fitting and summarising models
summary(lm(tongue.body.height ~ decade.centred + gender.sum, data=R.vq))
Call:
lm(formula = tongue.body.height ~ decade.centred + gender.sum,
data = R.vq)
Residuals:
Min 1Q Median 3Q Max
-2.1958 -0.8708 -0.4042 0.8042 2.8458
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.93750 0.27147 -3.453 0.00238 **
decade.centred 0.04083 0.02428 1.682 0.10744
gender.sum1 0.52083 0.27147 1.919 0.06874 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.33 on 21 degrees of freedom
Multiple R-squared: 0.2366, Adjusted R-squared: 0.1639
F-statistic: 3.254 on 2 and 21 DF, p-value: 0.05873
summary(lm(tongue.body.front.backness ~ decade.centred + gender.sum, data=R.vq))
Call:
lm(formula = tongue.body.front.backness ~ decade.centred + gender.sum,
data = R.vq)
Residuals:
Min 1Q Median 3Q Max
-0.72083 -0.55000 -0.04167 0.37083 2.27917
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.687500 0.150355 -11.223 2.48e-10 ***
decade.centred -0.009167 0.013448 -0.682 0.503
gender.sum1 0.270833 0.150355 1.801 0.086 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.7366 on 21 degrees of freedom
Multiple R-squared: 0.1501, Adjusted R-squared: 0.06918
F-statistic: 1.855 on 2 and 21 DF, p-value: 0.1812
summary(lm(advanced.tip.blade ~ decade.centred + gender.sum, data=R.vq))
Call:
lm(formula = advanced.tip.blade ~ decade.centred + gender.sum,
data = R.vq)
Residuals:
Min 1Q Median 3Q Max
-0.75535 -0.54625 -0.00076 0.40826 0.99924
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.58716 0.12417 12.783 4.42e-11 ***
decade.centred -0.00818 0.01091 -0.750 0.462
gender.sum1 0.04549 0.12417 0.366 0.718
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.5948 on 20 degrees of freedom
(1 observation deleted due to missingness)
Multiple R-squared: 0.03418, Adjusted R-squared: -0.0624
F-statistic: 0.3539 on 2 and 20 DF, p-value: 0.7063
Create figure 6.
# getting counts of different tongue body height values per decade
R.vq.count <- dplyr::count(R.vq, tongue.body.height, decade)
# arranging by decade / tongue body height
R.vq.ordered <- R.vq %>%
arrange(decade, tongue.body.height)
# creating graph
ggplot(R.vq.ordered, aes(x=factor(decade), y=tongue.body.height)) +
geom_point(x=1,y=1, aes(colour="Female")) + # this is a hack for getting the legend right
geom_point(x=1,y=1, aes(colour="Male")) + # this is a hack for getting the legend right
scale_color_manual(name="Gender", values=c("deepskyblue4","orange"), breaks=c("Female","Male")) +
geom_dotplot(binaxis="y", stackdir="center", binpositions="all",
fill=ifelse(R.vq.ordered$gender=="female", "deepskyblue4","orange"),
col=NA, dotsize=1.5) +
stat_summary(fun.y=mean, fun.ymin=mean, fun.ymax=mean, geom="crossbar", aes(group=decade), width=0.4, col="grey") +
scale_x_discrete(name="Decade of recording") +
scale_y_continuous(name="Tongue body height") +
guides(colour = guide_legend(override.aes = list(size = 4))) +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank(),
strip.text=element_text(size=12)) +
ggtitle("Changes in tongue body height over time")
# ggsave("graphs/tongue_body_height.pdf", width=6, height=4)
Same modelling strategy as above.
if (refit_please) {
# model without AR
system.time(
R.m.baseline.mod.no.AR <- bam(f3 ~ stress +
avg.f3 +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m)
)
# saveRDS(R.m.baseline.mod.no.AR, "models/R.m.baseline.mod.no.AR")
# extracting empirical autocorrelation values at lag 1
R.m.baseline.rho <- start_value_rho(R.m.baseline.mod.no.AR)
# full model with AR
system.time(
R.m.baseline.mod.AR <- bam(f3 ~ stress +
avg.f3 +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.baseline.rho)
)
# saveRDS(R.m.baseline.mod.AR, "models/R.m.baseline.mod.AR")
summary(R.m.baseline.mod.AR)
# nested model
system.time(
R.m.baseline.mod.AR.min.decade <- bam(f3 ~ stress +
avg.f3 +
# level 1 smooth terms
s(measurement.no) +
#s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
#ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.m, method="ML",
AR.start=R.m$start.event, rho=R.m.baseline.rho)
)
# saveRDS(R.m.baseline.mod.AR.min.decade, "models/R.m.baseline.mod.AR.min.decade")
}
R.m.baseline.mod.AR <- readRDS("models/R.m.baseline.mod.AR")
R.m.baseline.mod.AR.min.decade <- readRDS("models/R.m.baseline.mod.AR.min.decade")
compareML(R.m.baseline.mod.AR, R.m.baseline.mod.AR.min.decade)
R.m.baseline.mod.AR: f3 ~ stress + avg.f3 + s(measurement.no) + s(decade, k = 4) +
s(duration) + s(preceding.front, k = 3) + s(measurement.no,
by = stress) + s(decade, k = 4, by = stress) + ti(measurement.no,
decade, k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
k = 4)
R.m.baseline.mod.AR.min.decade: f3 ~ stress + avg.f3 + s(measurement.no) + s(duration) + s(preceding.front,
k = 3) + s(measurement.no, by = stress) + ti(measurement.no,
duration) + ti(measurement.no, preceding.front, k = c(10,
3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
s(measurement.no, word, bs = "fs", m = 1, k = 4)
Chi-square test of ML scores
-----
AIC difference: -0.04, model R.m.baseline.mod.AR has lower AIC.
AIC might not be reliable, as an AR1 model is included (rho1 = 0.848567, rho2 = 0.848567). Only small difference in ML...
Creating the bottom panel of figure 7.
# extracting predictions, formatting decade
R.m.baseline.full.plot <- plot_smooth(R.m.baseline.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="full"), rm.ranef=T, n.grid=100)$fv
Predictor decade is not a factor.
Summary:
* stress : factor; set to the value(s): full.
* avg.f3 : numeric predictor; set to the value(s): 2405.03714710253.
* measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
* decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
* duration : numeric predictor; set to the value(s): 0.122709575.
* preceding.front : numeric predictor; set to the value(s): 1.
* foll.broad.f3 : factor; set to the value(s): labial. (Might be canceled as random effect, check below.)
* speaker : factor; set to the value(s): 00-O-m06. (Might be canceled as random effect, check below.)
* word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
* NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
R.m.baseline.schwa.plot <- plot_smooth(R.m.baseline.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="schwa"), rm.ranef=T, n.grid=100)$fv
Predictor decade is not a factor.
Summary:
* stress : factor; set to the value(s): schwa.
* avg.f3 : numeric predictor; set to the value(s): 2405.03714710253.
* measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
* decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
* duration : numeric predictor; set to the value(s): 0.122709575.
* preceding.front : numeric predictor; set to the value(s): 1.
* foll.broad.f3 : factor; set to the value(s): labial. (Might be canceled as random effect, check below.)
* speaker : factor; set to the value(s): 00-O-m06. (Might be canceled as random effect, check below.)
* word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
* NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
R.m.baseline.plot <- rbind(R.m.baseline.full.plot, R.m.baseline.schwa.plot)
R.m.baseline.plot$decade <- factor(R.m.baseline.plot$decade, levels=c("1970","1980","1990","2000"))
# create graph
ggplot(R.m.baseline.plot, aes(x=measurement.no, y=fit, group=decade, col=decade, lty=decade)) +
geom_line(lwd=1) +
facet_grid(.~stress) +
geom_ribbon(aes(ymin=ll, ymax=ul, group=decade), col=NA,col="grey", alpha=0.1) +
scale_color_manual(name="decade of\nrecording", values=c("orange","darkorange3","deepskyblue1","deepskyblue4")) +
scale_linetype_discrete(name="decade of\nrecording") +
scale_x_continuous(name="measurement point", limits = c(0, 10), breaks=seq(0,10,2)) +
scale_y_continuous(name="F3 (Hz)", limits=c(2000, 3050)) +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank(),
strip.text=element_text(size=12)) +
ggtitle("F3 changes for /r/ in males (controlling for baseline F3)")
Duplicated aesthetics after name standardisation: colour
#ggsave("graphs/r_male_f3_change_control.pdf", width=8, height=4)
Same modelling strategy as above.
if (refit_please) {
# model without AR
system.time(
R.f.baseline.mod.no.AR <- bam(f3 ~ stress +
avg.f3 +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f)
)
# saveRDS(R.f.baseline.mod.no.AR, "models/R.f.baseline.mod.no.AR")
# extracting empirical autocorrelation value at lag 1
R.f.baseline.rho <- start_value_rho(R.f.baseline.mod.no.AR)
# full model with AR
system.time(
R.f.baseline.mod.AR <- bam(f3 ~ stress +
avg.f3 +
# level 1 smooth terms
s(measurement.no) +
s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
s(decade, k=4, by=stress) +
# level 2: 2d smooths
ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.baseline.rho)
)
# saveRDS(R.f.baseline.mod.AR, "models/R.f.baseline.mod.AR")
summary(R.f.baseline.mod.AR)
# nested model
system.time(
R.f.baseline.mod.AR.min.decade <- bam(f3 ~ stress +
avg.f3 +
# level 1 smooth terms
s(measurement.no) +
#s(decade, k=4) +
s(duration) +
s(preceding.front, k=3) +
# level 2: by terms
s(measurement.no, by=stress) +
#s(decade, k=4, by=stress) +
# level 2: 2d smooths
#ti(measurement.no, decade, k=c(10,4)) +
ti(measurement.no, duration) +
ti(measurement.no, preceding.front, k=c(10,3)) +
# level 3: 2d smooth by terms
#ti(measurement.no, decade, k=c(10,4), by=stress) +
# random smooths
s(measurement.no, foll.broad.f3, bs="fs", m=1, k=4) +
s(measurement.no, speaker, bs="fs", m=1, k=4) +
s(measurement.no, word, bs="fs", m=1, k=4),
dat=R.f, method="ML",
AR.start=R.f$start.event, rho=R.f.baseline.rho)
)
# saveRDS(R.f.baseline.mod.AR.min.decade, "models/R.f.baseline.mod.AR.min.decade")
}
R.f.baseline.mod.AR <- readRDS("models/R.f.baseline.mod.AR")
R.f.baseline.mod.AR.min.decade <- readRDS("models/R.f.baseline.mod.AR.min.decade")
compareML(R.f.baseline.mod.AR, R.f.baseline.mod.AR.min.decade)
R.f.baseline.mod.AR: f3 ~ stress + avg.f3 + s(measurement.no) + s(decade, k = 4) +
s(duration) + s(preceding.front, k = 3) + s(measurement.no,
by = stress) + s(decade, k = 4, by = stress) + ti(measurement.no,
decade, k = c(10, 4)) + ti(measurement.no, duration) + ti(measurement.no,
preceding.front, k = c(10, 3)) + ti(measurement.no, decade,
k = c(10, 4), by = stress) + s(measurement.no, foll.broad.f3,
bs = "fs", m = 1, k = 4) + s(measurement.no, speaker, bs = "fs",
m = 1, k = 4) + s(measurement.no, word, bs = "fs", m = 1,
k = 4)
R.f.baseline.mod.AR.min.decade: f3 ~ stress + avg.f3 + s(measurement.no) + s(duration) + s(preceding.front,
k = 3) + s(measurement.no, by = stress) + ti(measurement.no,
duration) + ti(measurement.no, preceding.front, k = c(10,
3)) + s(measurement.no, foll.broad.f3, bs = "fs", m = 1,
k = 4) + s(measurement.no, speaker, bs = "fs", m = 1, k = 4) +
s(measurement.no, word, bs = "fs", m = 1, k = 4)
Chi-square test of ML scores
-----
AIC difference: -7.88, model R.f.baseline.mod.AR has lower AIC.
AIC might not be reliable, as an AR1 model is included (rho1 = 0.850645, rho2 = 0.850645).
Creating the top panel of figure 7.
R.f.baseline.full.plot <- plot_smooth(R.f.baseline.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="full"), rm.ranef=T, n.grid=100)$fv
Predictor decade is not a factor.
Summary:
* stress : factor; set to the value(s): full.
* avg.f3 : numeric predictor; set to the value(s): 2717.01597051597.
* measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
* decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
* duration : numeric predictor; set to the value(s): 0.103923377.
* preceding.front : numeric predictor; set to the value(s): 1.
* foll.broad.f3 : factor; set to the value(s): coronal. (Might be canceled as random effect, check below.)
* speaker : factor; set to the value(s): 00-O-f01. (Might be canceled as random effect, check below.)
* word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
* NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
R.f.baseline.schwa.plot <- plot_smooth(R.f.baseline.mod.AR, view="measurement.no", plot_all="decade",
cond=list(stress="schwa"), rm.ranef=T, n.grid=100)$fv
Predictor decade is not a factor.
Summary:
* stress : factor; set to the value(s): schwa.
* avg.f3 : numeric predictor; set to the value(s): 2717.01597051597.
* measurement.no : numeric predictor; with 100 values ranging from 0.000000 to 10.000000.
* decade : numeric predictor; with 4 values ranging from 1970.000000 to 2000.000000.
* duration : numeric predictor; set to the value(s): 0.103923377.
* preceding.front : numeric predictor; set to the value(s): 1.
* foll.broad.f3 : factor; set to the value(s): coronal. (Might be canceled as random effect, check below.)
* speaker : factor; set to the value(s): 00-O-f01. (Might be canceled as random effect, check below.)
* word : factor; set to the value(s): were. (Might be canceled as random effect, check below.)
* NOTE : The following random effects columns are canceled: s(measurement.no,foll.broad.f3),s(measurement.no,speaker),s(measurement.no,word)
R.f.baseline.plot <- rbind(R.f.baseline.full.plot, R.f.baseline.schwa.plot)
R.f.baseline.plot$decade <- factor(R.f.baseline.plot$decade, levels=c("1970","1980","1990","2000"))
# creating graph
ggplot(R.f.baseline.plot, aes(x=measurement.no, y=fit, group=decade, col=decade, lty=decade)) +
geom_line(lwd=1) +
facet_grid(.~stress) +
geom_ribbon(aes(ymin=ll, ymax=ul, group=decade), col=NA,col="grey", alpha=0.1) +
scale_color_manual(name="decade of\nrecording", values=c("orange","darkorange3","deepskyblue1","deepskyblue4")) +
scale_linetype_discrete(name="decade of\nrecording") +
scale_x_continuous(name="measurement point", limits = c(0, 10), breaks=seq(0,10,2)) +
scale_y_continuous(name="F3 (Hz)", limits=c(2000, 3050)) +
theme_bw() +
theme(axis.title = element_text(size=14),
axis.text = element_text(size=12),
legend.title = element_text(size=14, face="bold"),
legend.text=element_text(size=12),
plot.title=element_text(size=14, face="bold"),
panel.grid=element_blank(),
strip.text=element_text(size=12)) +
ggtitle("F3 changes for /r/ in females (controlling for baseline F3)")
Duplicated aesthetics after name standardisation: colour
#ggsave("graphs/r_female_f3_change_control.pdf", width=8, height=4)